## Replication code for "City Limits to Partisan Polarization" by Amalie Jensen,
## William Marble, Kenneth Scheve, and Matthew Slaughter

# Makes density plot of estimated IMCE's (Fig. 4) as well as table of second-level
# partisan coefficients (Table 2).

library(rstan)
library(dplyr)
library(ggplot2)
library(ggridges)
library(ggpubr)
library(stringr)
theme_set(theme_bw())


select <- dplyr::select

# load parameter summaries --------------------------------------------------

# summary of parameters from summary.stanfit()
params = readRDS("Data/summary_socioecon_pid7.rds")
betas  = subset(params, grepl("beta", parname))

# parse the parameter names into respondent id, factor, and level 
fact =  str_extract(betas$parname, "beta_[a-z]*")
index = str_replace(betas$parname, fact, "")
index = str_replace(index, "(\\[)", "")
index = str_replace(index, "(\\])", "")
index = str_split(index, ",")
index = as.matrix(do.call(rbind, index))
betas$factor = fact
betas$level = as.numeric(index[, 2])
betas$respondent = as.numeric(index[,1])



# clean and label
betas$factor2 = car::recode(
  betas$factor, 
  "'beta_educ' = 'Education';
  'beta_hieduc' = 'Higher Education';
  'beta_invest' = 'Investment & Taxes';
  'beta_gov' = 'Governance'; 
  'beta_workers' = 'Workers & Entrepreneurs';
  'beta_local' = 'Local Services'"
)
educ_recode = "1 = 'Charter schools'; 2 = 'Vouchers to schools'; 3 = 'Free pre-school'; 4 = 'Pay teachers more'"
hieduc_recode = "1 = 'Community colleges'; 2 = 'Local public universities'; 3 = 'Technical vocational training'; 4 = 'Student grant programs'"
invest_recode = "1 = 'Attract new businesses'; 2 = 'Stimulate existing companies'; 3 = 'Encourage investment by charities'"
gov_recode = "1 = 'Consolidate local government'; 2 = 'More power to the state'"
workers_recode = "1 = 'Limit unions\\' power'; 2 = 'Expand unions\\' power'; 3 = 'Worker training vouchers'; 4 = 'Tax breaks to entrepreneurs'"
local_recode =  "1 = 'Affordable housing'; 2 = 'Public transportation'; 3 = 'Public safety and crime prevention'"

for (f in unique(betas$factor)) {
  rec = paste0(str_replace(f, "beta_", ""), "_recode")
  betas$level_code[betas$factor == f] = car::recode(betas$level[betas$factor == f], get(rec))
}

# reorder levels for visualization
lev_order = c("Keep current", 
              "Vouchers to schools", 
              "Pay teachers more", 
              "Free pre-school", 
              "Charter schools", 
              "Technical vocational training", 
              "Student grant programs", 
              "Local public universities", 
              "Community colleges", 
              "Stimulate existing companies", 
              "Encourage investment by charities", 
              "Attract new businesses", 
              "More power to the state", 
              "Consolidate local government", 
              "Worker training vouchers", 
              "Tax breaks to entrepreneurs", 
              "Limit unions\' power", 
              "Expand unions\' power", 
              "Public transportation", 
              "Public safety and crime prevention",
              "Affordable housing"
)
betas$level_code = factor(betas$level_code, levels = lev_order)
betas$factor2 = factor(
  betas$factor2, 
  levels = c("Investment & Taxes", "Workers & Entrepreneurs","Local Services",
             "Governance", "Education", "Higher Education")
  )



# plot --------------------------------------------------------------------

# function to get density of data at a pre-specified grid of points.
# used to compute density at the observed X values of the data
dens_at_grid = Vectorize(function(point, data, ...){
  stopifnot(length(point) == 1)
  density(data, from = point, to = point, n = 1, ...)$y
}, "point")

# get density estimates
betas = betas %>% 
  group_by(level_code) %>% 
  mutate(dens = dens_at_grid(mean, mean))

# get average IMCE (i.e. estimated AMCE)
means = betas %>% 
  group_by(level_code, factor2) %>% 
  summarise(mean = mean(mean))

betas %>% filter(dens >= quantile(dens, .025)) %>% 
ggplot() + 
  geom_vline(xintercept = 0, lty = 3) + 
  geom_point(data = means, aes(x = mean, y = level_code)) + 
  geom_ridgeline(aes(x = mean, y = level_code, height = dens), 
                 fill = "transparent", scale = .04, min_height = .01) + 
  labs(x = "Estimated Individual Marginal Component Effect", y = NULL) + 
  facet_wrap(~factor2, ncol = 1, drop = TRUE, scales = "free_y") + 
  scale_colour_manual(values = c("blue", "red"), name = NULL) + 
  theme(legend.position = "bottom", panel.spacing = unit(0, "in")) + 
  coord_cartesian(xlim = c(-.15, .15))
# ggsave("../PaperDrafts/ResearchPaper1/Graphs/conjoint_imce_densities.pdf", height=10, width=6)
ggsave("Output/fig4.pdf", height=10, width=6)
ggsave("Output/fig4.eps", height=10, width=6)




# make table of party coefficients ----------------------------------------

# Table 2 in paper

# parse gamma coeffients like i did above
gammas = subset(params, grepl("gamma", parname))


# parse the parameter names into respondent id, factor, and level 
fact =  str_extract(gammas$parname, "gamma_[a-z]*")
index = str_replace(gammas$parname, fact, "")
index = str_replace(index, "(\\[)", "")
index = str_replace(index, "(\\])", "")
index = str_split(index, ",")
index = as.matrix(do.call(rbind, index))
gammas$factor = fact
gammas$level = as.numeric(index[, 1])
gammas$covariate = as.numeric(index[,2])



# clean and label
gammas$factor2 = car::recode(
  gammas$factor, 
  "'gamma_educ' = 'Education';
  'gamma_hieduc' = 'Higher Education';
  'gamma_invest' = 'Investment & Taxes';
  'gamma_gov' = 'Governance'; 
  'gamma_workers' = 'Workers & Entrepreneurs';
  'gamma_local' = 'Local Services';
  'gamma_intercept' = 'Intercept'"
)
educ_recode = "1 = 'Charter schools'; 2 = 'Vouchers to schools'; 3 = 'Free pre-school'; 4 = 'Pay teachers more'"
hieduc_recode = "1 = 'Community colleges'; 2 = 'Local public universities'; 3 = 'Technical vocational training'; 4 = 'Student grant programs'"
invest_recode = "1 = 'Attract new businesses'; 2 = 'Stimulate existing companies'; 3 = 'Encourage investment by charities'"
gov_recode = "1 = 'Consolidate local government'; 2 = 'More power to the state'"
workers_recode = "1 = 'Limit unions\\' power'; 2 = 'Expand unions\\' power'; 3 = 'Worker training vouchers'; 4 = 'Tax breaks to entrepreneurs'"
local_recode =  "1 = 'Affordable housing'; 2 = 'Public transportation'; 3 = 'Public safety and crime prevention'"

for (f in unique(gammas$factor)) {
  if(f == "gamma_intercept") {
    gammas$level_code[gammas$factor==f] = "Strong Rep. - Strong Dem."
    next
  }
  rec = paste0(str_replace(f, "gamma_", ""), "_recode")
  gammas$level_code[gammas$factor == f] = car::recode(gammas$level[gammas$factor == f], get(rec))
}

# reorder levels for visualization
lev_order = c("Keep current", 
              "Vouchers to schools", 
              "Pay teachers more", 
              "Free pre-school", 
              "Charter schools", 
              "Technical vocational training", 
              "Student grant programs", 
              "Local public universities", 
              "Community colleges", 
              "Stimulate existing companies", 
              "Encourage investment by charities", 
              "Attract new businesses", 
              "More power to the state", 
              "Consolidate local government", 
              "Worker training vouchers", 
              "Tax breaks to entrepreneurs", 
              "Limit unions\' power", 
              "Expand unions\' power", 
              "Public transportation", 
              "Public safety and crime prevention",
              "Affordable housing",
              "Strong Rep. - Strong Dem."
)
gammas$level_code = factor(gammas$level_code, levels = lev_order)
gammas$factor2 = factor(
  gammas$factor2, 
  levels = c("Investment & Taxes", "Workers & Entrepreneurs","Local Services",
             "Governance", "Education", "Higher Education", "Intercept")
)

# match individual covariates to numbers, based on what's in 03analyze_pid7_socioecon.R
covs = c("Intercept", 
         "Party: Weak Dem.", 
         "Party: Lean Dem.",
         "Party: Independent",
         "Party: Lean Rep.",
         "Party: Weak Rep.",
         "Party: Strong Rep.",
         "Age: 31-50", "Age: 51-65", "Age: 65+", 
         "Race: Black", "Race: Latino", "Race: Other",
         "Female", 
         "College", 
         "Income: Second Quartile", "Income: Third Quartile", "Income: Fourth Quartile", 
         "Looking for Work", 
         "Homeowner", 
         "Years in MSA: 1-5", "Years in MSA: 6-10", "Years in MSA: 11-15", 
         "Years in MSA: 16+",
         "MSA: Cleveland", "MSA: Houston", "MSA: Indianapolis",
         "MSA: Memphis", "MSA: Rochester", "MSA: St. Louis", 
         "MSA: Seattle")
covs_recode = data.frame(covariate = 1:length(covs), covariate2 = covs)
gammas = left_join(gammas, covs_recode, by = "covariate")


# Plot strong partisan point estimates and 95% CI's -- coefplot corresponding
# to Table 2 (this plot didn't make it into the paper)
gammas %>% 
  filter(covariate2 == "Party: Strong Rep.") %>% 
  ggplot() + 
  aes(x = mean, xmin = `2.5%`, xmax = `97.5%`, y = level_code) + 
  geom_vline(xintercept = 0, lty=3) + 
  geom_errorbarh(height = 0) + 
  geom_point() + 
  facet_wrap(~factor2, drop = TRUE, scales = "free_y", ncol=1) + 
  labs(x = "Strong Republican -\nStrong Democrat Difference", 
       y = NULL) + 
  theme(panel.spacing = unit(0,"in"))
ggsave("Output/extras/gammas_strong_partisan.pdf", width=5, height=7)



# Make Table 2
table = gammas %>% 
  filter(covariate2 == "Party: Strong Rep.") %>% 
  select(factor2, level_code, mean, sd, `2.5%`, `97.5%`)

tab_order = lev_order[-1]
table$level_code = factor(table$level_code, tab_order)
table$factor2 = factor(
  table$factor2,
  c("Investment & Taxes", "Workers & Entrepreneurs","Local Services",
    "Governance", "Education", "Higher Education", "Intercept") 
)
table = table %>% arrange(factor2, level_code)
table = table %>% 
  mutate(signif = sign(`2.5%`) == sign(`97.5%`)) %>% 
  mutate(mean = format(round(mean, 3), nsmall = 3),
         sd   = format(round(sd,   3), nsmall = 3),
         `2.5%` = format(round(`2.5%`, 2), nsmall = 2),
         `97.5%` = format(round(`97.5%`, 2), nsmall = 2)) %>% 
  mutate(mean = ifelse(signif, paste0(mean, "^{*}"), mean)) %>% 
  select(-signif)

# add a "phantom" minus sign in CI's for spacing
table = table %>% 
  mutate(`2.5%` = ifelse(grepl("-", `2.5%`), `2.5%`, paste0("\\phantom{-}", `2.5%`)),
         `97.5%` = ifelse(grepl("-", `97.5%`), `97.5%`, paste0("\\phantom{-}", `97.5%`)))

table = table %>% 
  mutate(ci = paste0("[", `2.5%`, ", ", `97.5%`, "]")) %>% 
  mutate(sd = paste0("(", sd, ")")) %>% 
  select(-`2.5%`, -`97.5%`) %>% 
  mutate(factor2 = as.character(factor2)) %>% 
  mutate(factor2 = str_replace(factor2, fixed("&"), "\\&")) %>% 
  mutate(factor2 = c("Investment \\& Taxes", "", "", 
                     "Workers \\& Entrepreneurs", "", "", "", 
                     "Local Services", "", "",
                     "Governance", "", 
                     "Education", "", "", "", 
                     "Higher Education", "", "", "",
                     "Intercept"
  ))

header = "\\begin{tabular}{rrD{.}{.}{3}D{.}{.}{3}D{.}{.}{2}r} \n \\toprule \n Plan Dimension & Level &  \\multicolumn{1}{c}{Mean} &  \\multicolumn{1}{c}{Post. \\textsc{SD}} & \\multicolumn{1}{c}{95\\% \\textsc{CI}} & \\hspace{7mm} \\\\"
table$filler = "\\\\"
table = table %>% 
  mutate(factor2 = ifelse(factor2!="", paste0("\\midrule ", factor2), factor2))
table = apply(table, 1, paste, collapse = "  &  ")
# table = paste(table, "\\\\")
table[length(table)] = paste0(table[length(table)], "\\bottomrule")
footer = "\\end{tabular}"
table = c(header, table, footer)
cat(table, sep = "\n", file = "Output/extras/gammas_pid7.tex")
cat(table, sep="\n")
